import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
import warnings
warnings.filterwarnings('ignore')
data = pd.read_csv('terrorist-attacks.csv')
df = pd.DataFrame(data=data)
df.head(5)
| Entity | Year | Terrorist attacks | Terrorism deaths | Attack method: Hijacking | Attack method: Hostage Taking (Barricade Incident) | Attack method: Unarmed Assault | Attack method: Facility/Infrastructure Attack | Attack method: Hostage Taking (Kidnapping) | Attack method: Assassination | Attack method: Armed Assault | Attack method: Bombing/Explosion | Death Age 100+ | Death Age: 51-99 | Death Age : 21-50 | Death Age : 11-20 | Death Age : 6-10 | Death Age : 1-5 | Terrorist Death Type : Suicide | Terrorist Death Type : Killed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 1979 | 3 | 53 | 3 | 6 | 5 | 24 | 1 | 52 | 1 | 1 | 2 | 7 | 1 | 17 | 25 | 2 | 402 | 53 |
| 1 | Afghanistan | 1988 | 11 | 128 | 3 | 6 | 5 | 24 | 30 | 52 | 129 | 11 | 2 | 7 | 3 | 1 | 2 | 4 | 402 | 128 |
| 2 | Afghanistan | 1989 | 10 | 10 | 3 | 6 | 5 | 24 | 30 | 1 | 129 | 9 | 2 | 7 | 11 | 17 | 1 | 1 | 402 | 10 |
| 3 | Afghanistan | 1990 | 2 | 12 | 3 | 6 | 5 | 24 | 30 | 52 | 129 | 2 | 2 | 7 | 11 | 17 | 2 | 3 | 402 | 12 |
| 4 | Afghanistan | 1991 | 30 | 68 | 3 | 6 | 5 | 24 | 4 | 1 | 2 | 23 | 2 | 7 | 11 | 4 | 25 | 7 | 402 | 68 |
df.shape
(3580, 20)
df.columns
Index(['Entity', 'Year', 'Terrorist attacks', 'Terrorism deaths',
'Attack method: Hijacking',
'Attack method: Hostage Taking (Barricade Incident)',
'Attack method: Unarmed Assault',
'Attack method: Facility/Infrastructure Attack',
'Attack method: Hostage Taking (Kidnapping)',
'Attack method: Assassination', 'Attack method: Armed Assault',
'Attack method: Bombing/Explosion', 'Death Age 100+',
'Death Age: 51-99 ', 'Death Age : 21-50 ', 'Death Age : 11-20 ',
'Death Age : 6-10 ', 'Death Age : 1-5',
'Terrorist Death Type : Suicide', 'Terrorist Death Type : Killed'],
dtype='object')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3580 entries, 0 to 3579 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Entity 3580 non-null object 1 Year 3580 non-null int64 2 Terrorist attacks 3580 non-null int64 3 Terrorism deaths 3580 non-null int64 4 Attack method: Hijacking 3580 non-null int64 5 Attack method: Hostage Taking (Barricade Incident) 3580 non-null int64 6 Attack method: Unarmed Assault 3580 non-null int64 7 Attack method: Facility/Infrastructure Attack 3580 non-null int64 8 Attack method: Hostage Taking (Kidnapping) 3580 non-null int64 9 Attack method: Assassination 3580 non-null int64 10 Attack method: Armed Assault 3580 non-null int64 11 Attack method: Bombing/Explosion 3580 non-null int64 12 Death Age 100+ 3580 non-null int64 13 Death Age: 51-99 3580 non-null int64 14 Death Age : 21-50 3580 non-null int64 15 Death Age : 11-20 3580 non-null int64 16 Death Age : 6-10 3580 non-null int64 17 Death Age : 1-5 3580 non-null int64 18 Terrorist Death Type : Suicide 3580 non-null int64 19 Terrorist Death Type : Killed 3580 non-null int64 dtypes: int64(19), object(1) memory usage: 559.5+ KB
df.describe()
| Year | Terrorist attacks | Terrorism deaths | Attack method: Hijacking | Attack method: Hostage Taking (Barricade Incident) | Attack method: Unarmed Assault | Attack method: Facility/Infrastructure Attack | Attack method: Hostage Taking (Kidnapping) | Attack method: Assassination | Attack method: Armed Assault | Attack method: Bombing/Explosion | Death Age 100+ | Death Age: 51-99 | Death Age : 21-50 | Death Age : 11-20 | Death Age : 6-10 | Death Age : 1-5 | Terrorist Death Type : Suicide | Terrorist Death Type : Killed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 | 3580.000000 |
| mean | 1998.127095 | 224.179888 | 521.363128 | 3.095531 | 5.701676 | 5.153631 | 24.017039 | 29.981285 | 40.323464 | 81.833520 | 131.531844 | 2.045251 | 6.565084 | 11.111173 | 17.078212 | 24.921229 | 86.549162 | 400.663687 | 443.396927 |
| std | 13.999832 | 894.253760 | 2244.344674 | 2.631579 | 4.086977 | 4.903811 | 45.202120 | 69.089694 | 74.679326 | 201.824063 | 463.602476 | 0.904595 | 2.522092 | 12.884682 | 25.524157 | 46.428546 | 376.048075 | 482.420046 | 1818.156211 |
| min | 1970.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 25% | 1987.000000 | 4.000000 | 3.000000 | 3.000000 | 6.000000 | 5.000000 | 5.000000 | 4.000000 | 2.000000 | 3.000000 | 4.000000 | 2.000000 | 7.000000 | 11.000000 | 9.000000 | 4.000000 | 1.000000 | 402.000000 | 3.000000 |
| 50% | 1997.000000 | 18.000000 | 19.000000 | 3.000000 | 6.000000 | 5.000000 | 24.000000 | 30.000000 | 22.000000 | 17.000000 | 28.000000 | 2.000000 | 7.000000 | 11.000000 | 17.000000 | 25.000000 | 4.000000 | 402.000000 | 18.000000 |
| 75% | 2011.000000 | 106.000000 | 154.000000 | 3.000000 | 6.000000 | 5.000000 | 24.000000 | 30.000000 | 52.000000 | 129.000000 | 131.000000 | 2.000000 | 7.000000 | 11.000000 | 17.000000 | 25.000000 | 28.000000 | 402.000000 | 154.000000 |
| max | 2021.000000 | 16820.000000 | 44576.000000 | 58.000000 | 85.000000 | 106.000000 | 776.000000 | 1409.000000 | 1007.000000 | 4037.000000 | 8739.000000 | 29.000000 | 52.000000 | 275.000000 | 522.000000 | 853.000000 | 6612.000000 | 10337.000000 | 38627.000000 |
df.isnull().sum()
Entity 0 Year 0 Terrorist attacks 0 Terrorism deaths 0 Attack method: Hijacking 0 Attack method: Hostage Taking (Barricade Incident) 0 Attack method: Unarmed Assault 0 Attack method: Facility/Infrastructure Attack 0 Attack method: Hostage Taking (Kidnapping) 0 Attack method: Assassination 0 Attack method: Armed Assault 0 Attack method: Bombing/Explosion 0 Death Age 100+ 0 Death Age: 51-99 0 Death Age : 21-50 0 Death Age : 11-20 0 Death Age : 6-10 0 Death Age : 1-5 0 Terrorist Death Type : Suicide 0 Terrorist Death Type : Killed 0 dtype: int64
df['Terrorist attacks'].plot.box()
<Axes: >
df['Terrorism deaths'].plot.box()
<Axes: >
# OUTLIERS Preventions
# Use of IQR
# Use of Zscore
# log transform
df_iqr = df
Q1 = df_iqr.quantile(0.25)
Q3 = df_iqr.quantile(0.75)
IQR = Q3-Q1
df_iqr = df_iqr[~(df_iqr < (Q1-1.5*IQR)) | (df_iqr > (Q3+1.5*IQR))]
print(df_iqr)
Entity Year Terrorist attacks Terrorism deaths \
0 Afghanistan 1979 3 53
1 Afghanistan 1988 11 128
2 Afghanistan 1989 10 10
3 Afghanistan 1990 2 12
4 Afghanistan 1991 30 68
... ... ... ... ...
3575 Zimbabwe 2008 5 7
3576 Zimbabwe 2010 1 1
3577 Zimbabwe 2013 3 1
3578 Zimbabwe 2018 2 2
3579 Zimbabwe 2020 1 1
Attack method: Hijacking \
0 3.0
1 3.0
2 3.0
3 3.0
4 3.0
... ...
3575 3.0
3576 3.0
3577 3.0
3578 3.0
3579 3.0
Attack method: Hostage Taking (Barricade Incident) \
0 6.0
1 6.0
2 6.0
3 6.0
4 6.0
... ...
3575 6.0
3576 6.0
3577 6.0
3578 6.0
3579 6.0
Attack method: Unarmed Assault \
0 5.0
1 5.0
2 5.0
3 5.0
4 5.0
... ...
3575 5.0
3576 5.0
3577 5.0
3578 5.0
3579 5.0
Attack method: Facility/Infrastructure Attack \
0 24
1 24
2 24
3 24
4 24
... ...
3575 24
3576 24
3577 1
3578 24
3579 24
Attack method: Hostage Taking (Kidnapping) \
0 1
1 30
2 30
3 30
4 4
... ...
3575 30
3576 30
3577 30
3578 30
3579 30
Attack method: Assassination Attack method: Armed Assault \
0 52 1
1 52 129
2 1 129
3 52 129
4 1 2
... ... ...
3575 52 2
3576 52 1
3577 52 2
3578 1 1
3579 52 1
Attack method: Bombing/Explosion Death Age 100+ Death Age: 51-99 \
0 1 2.0 7.0
1 11 2.0 7.0
2 9 2.0 7.0
3 2 2.0 7.0
4 23 2.0 7.0
... ... ... ...
3575 3 2.0 7.0
3576 131 2.0 7.0
3577 131 2.0 7.0
3578 131 2.0 7.0
3579 131 2.0 7.0
Death Age : 21-50 Death Age : 11-20 Death Age : 6-10 \
0 NaN 17 25
1 NaN 1 2
2 11.0 17 1
3 11.0 17 2
4 11.0 4 25
... ... ... ...
3575 11.0 17 1
3576 11.0 17 25
3577 11.0 17 25
3578 11.0 17 25
3579 11.0 17 25
Death Age : 1-5 Terrorist Death Type : Suicide \
0 2 402.0
1 4 402.0
2 1 402.0
3 3 402.0
4 7 402.0
... ... ...
3575 3 402.0
3576 1 402.0
3577 1 402.0
3578 1 402.0
3579 1 402.0
Terrorist Death Type : Killed
0 53
1 128
2 10
3 12
4 68
... ...
3575 7
3576 1
3577 1
3578 2
3579 1
[3580 rows x 20 columns]
import numpy as np
from scipy import stats
# Assuming df is your DataFrame
numeric_columns = df.select_dtypes(include=[np.number]).columns
zscore = np.abs(stats.zscore(df[numeric_columns]))
print(zscore)
threshold = 3
print(np.where(zscore>3))
Year Terrorist attacks Terrorism deaths Attack method: Hijacking \
0 1.366428 0.247369 0.208715 0.036307
1 0.723474 0.238422 0.175293 0.036307
2 0.652034 0.239540 0.227877 0.036307
3 0.580595 0.248488 0.226986 0.036307
4 0.509155 0.217172 0.202031 0.036307
... ... ... ... ...
3575 0.705314 0.245132 0.229214 0.036307
3576 0.848193 0.249606 0.231888 0.036307
3577 1.062512 0.247369 0.231888 0.036307
3578 1.419709 0.248488 0.231442 0.036307
3579 1.562587 0.249606 0.231888 0.036307
Attack method: Hostage Taking (Barricade Incident) \
0 0.073004
1 0.073004
2 0.073004
3 0.073004
4 0.073004
... ...
3575 0.073004
3576 0.073004
3577 0.073004
3578 0.073004
3579 0.073004
Attack method: Unarmed Assault \
0 0.031333
1 0.031333
2 0.031333
3 0.031333
4 0.031333
... ...
3575 0.031333
3576 0.031333
3577 0.031333
3578 0.031333
3579 0.031333
Attack method: Facility/Infrastructure Attack \
0 0.000377
1 0.000377
2 0.000377
3 0.000377
4 0.000377
... ...
3575 0.000377
3576 0.000377
3577 0.509274
3578 0.000377
3579 0.000377
Attack method: Hostage Taking (Kidnapping) \
0 0.419532
1 0.000271
2 0.000271
3 0.000271
4 0.376104
... ...
3575 0.000271
3576 0.000271
3577 0.000271
3578 0.000271
3579 0.000271
Attack method: Assassination Attack method: Armed Assault \
0 0.156378 0.400571
1 0.156378 0.233734
2 0.526638 0.233734
3 0.156378 0.233734
4 0.526638 0.395615
... ... ...
3575 0.156378 0.395615
3576 0.156378 0.400571
3577 0.156378 0.395615
3578 0.526638 0.400571
3579 0.156378 0.400571
Attack method: Bombing/Explosion Death Age 100+ Death Age: 51-99 \
0 0.281599 0.050031 0.172467
1 0.260026 0.050031 0.172467
2 0.264341 0.050031 0.172467
3 0.279442 0.050031 0.172467
4 0.234138 0.050031 0.172467
... ... ... ...
3575 0.277285 0.050031 0.172467
3576 0.001147 0.050031 0.172467
3577 0.001147 0.050031 0.172467
3578 0.001147 0.050031 0.172467
3579 0.001147 0.050031 0.172467
Death Age : 21-50 Death Age : 11-20 Death Age : 6-10 \
0 0.784853 0.003065 0.001697
1 0.629609 0.630009 0.493757
2 0.008630 0.003065 0.515299
3 0.008630 0.003065 0.493757
4 0.008630 0.512457 0.001697
... ... ... ...
3575 0.008630 0.003065 0.515299
3576 0.008630 0.003065 0.001697
3577 0.008630 0.003065 0.001697
3578 0.008630 0.003065 0.001697
3579 0.008630 0.003065 0.001697
Death Age : 1-5 Terrorist Death Type : Suicide \
0 0.224867 0.00277
1 0.219548 0.00277
2 0.227527 0.00277
3 0.222208 0.00277
4 0.211569 0.00277
... ... ...
3575 0.222208 0.00277
3576 0.227527 0.00277
3577 0.227527 0.00277
3578 0.227527 0.00277
3579 0.227527 0.00277
Terrorist Death Type : Killed
0 0.214751
1 0.173495
2 0.238405
3 0.237305
4 0.206500
... ...
3575 0.240055
3576 0.243356
3577 0.243356
3578 0.242806
3579 0.243356
[3580 rows x 19 columns]
(array([ 28, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
32, 32, 32, 32, 32, 32, 55, 56, 59, 59, 59,
60, 75, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77,
77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 79,
79, 79, 79, 79, 79, 79, 80, 80, 80, 80, 80,
81, 81, 81, 81, 81, 81, 81, 81, 82, 82, 82,
82, 82, 82, 82, 82, 82, 194, 198, 199, 200, 202,
203, 204, 204, 204, 205, 211, 214, 215, 215, 215, 215,
216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 217,
217, 217, 217, 217, 217, 217, 218, 218, 218, 218, 218,
218, 218, 219, 219, 219, 219, 219, 219, 219, 219, 220,
220, 220, 220, 220, 220, 220, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 222, 222,
222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222,
222, 222, 222, 223, 223, 223, 223, 223, 223, 223, 223,
223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 224,
224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 225, 225, 225, 225, 225,
225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225,
225, 225, 226, 226, 226, 226, 226, 226, 226, 226, 226,
226, 226, 226, 226, 226, 226, 226, 226, 226, 227, 227,
227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227,
227, 227, 227, 227, 228, 228, 228, 228, 228, 228, 228,
228, 228, 228, 228, 228, 228, 228, 228, 228, 229, 229,
229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 230,
230, 230, 230, 230, 230, 544, 545, 545, 547, 548, 548,
549, 549, 550, 981, 982, 1071, 1072, 1094, 1096, 1099, 1239,
1349, 1370, 1371, 1371, 1372, 1471, 1473, 1473, 1479, 1479, 1479,
1479, 1480, 1480, 1480, 1480, 1480, 1480, 1480, 1480, 1480, 1480,
1481, 1481, 1481, 1481, 1481, 1481, 1481, 1482, 1482, 1482, 1482,
1482, 1482, 1482, 1482, 1482, 1482, 1482, 1483, 1483, 1907, 1908,
1908, 1919, 1921, 1921, 1926, 1927, 1927, 1927, 1927, 1927, 1927,
1927, 1927, 1927, 1927, 1928, 1928, 1928, 1928, 1928, 1928, 1928,
1928, 1928, 1928, 1928, 1928, 1928, 1928, 1928, 1929, 1929, 1929,
1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929, 1929,
1929, 1930, 1930, 1930, 1930, 1930, 1930, 1930, 1930, 1930, 1930,
1930, 1930, 1930, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931,
1931, 1931, 1931, 1931, 1932, 2078, 2143, 2143, 2143, 2143, 2143,
2144, 2144, 2181, 2247, 2390, 2632, 2633, 2634, 2635, 2637, 2682,
2688, 2688, 2702, 2703, 2705, 2705, 2705, 2705, 2705, 2706, 2706,
2706, 2706, 2706, 2706, 2706, 2706, 2707, 2707, 2707, 2707, 2707,
2707, 2707, 2707, 2707, 2707, 2708, 2708, 2708, 2708, 2708, 2708,
2708, 2708, 2708, 2708, 2708, 2708, 2708, 2708, 2709, 2709, 2709,
2709, 2709, 2709, 2709, 2709, 2709, 2709, 2709, 2709, 2710, 2710,
2710, 2710, 2710, 2710, 2710, 2710, 2710, 2710, 2711, 2711, 2711,
2711, 2711, 2711, 2711, 2711, 2711, 2711, 2711, 2711, 2711, 2712,
2712, 2712, 2712, 2712, 2712, 2712, 2712, 2713, 2713, 2713, 2713,
2713, 2713, 2713, 2713, 2775, 2776, 2776, 2896, 2896, 2896, 2896,
2896, 2896, 2896, 2896, 2896, 2896, 2897, 2897, 2897, 2897, 2897,
2897, 2897, 2897, 2897, 2897, 2897, 2898, 2898, 2898, 2898, 2899,
2899, 2899, 2900, 2900, 2900, 2900, 2901, 2901, 2901, 2901, 2901,
2901, 2901, 2901, 2902, 2902, 2902, 2902, 2902, 2902, 2902, 2902,
2902, 2984, 3283, 3352, 3372, 3417, 3418, 3442, 3448, 3448, 3450,
3455, 3456, 3456, 3457, 3457, 3458, 3458, 3458, 3459, 3459, 3459,
3460, 3461, 3461, 3462, 3462, 3462, 3462, 3463, 3463, 3463, 3464,
3464, 3465, 3465, 3466, 3466, 3466, 3466, 3466, 3466, 3467, 3467,
3467, 3467, 3467, 3467, 3467, 3467, 3467, 3467, 3467, 3468, 3468,
3468, 3468, 3468, 3468, 3468, 3468, 3469, 3469, 3469, 3469, 3469,
3469, 3469, 3469, 3469, 3470, 3470, 3470, 3470, 3470, 3470, 3470,
3470, 3470, 3470, 3470, 3470, 3470, 3470, 3471, 3471, 3471, 3471,
3471, 3471, 3471, 3471, 3471, 3471, 3472, 3472, 3472, 3472, 3473,
3473, 3473, 3473, 3474, 3474, 3474, 3474, 3474, 3474, 3474, 3474,
3474, 3474, 3474, 3474, 3475, 3476, 3477, 3478, 3478, 3478, 3478,
3478, 3479, 3480, 3482, 3482, 3483, 3483, 3483, 3483, 3483, 3483,
3483, 3483, 3484, 3484, 3484, 3484, 3484, 3484, 3484, 3484, 3484,
3484, 3484, 3484, 3485, 3485, 3485, 3485, 3485, 3485, 3485, 3485,
3485, 3485, 3485, 3485, 3485, 3485, 3486, 3486, 3486, 3486, 3486,
3486, 3486, 3486, 3486, 3486, 3486, 3486, 3486, 3487, 3487, 3487,
3487, 3487, 3487, 3487, 3487, 3487, 3487, 3487, 3488, 3488, 3488,
3488, 3488, 3488, 3488, 3488, 3488, 3488, 3488, 3489, 3489, 3489,
3489, 3489, 3489, 3489, 3489, 3489, 3489, 3489, 3489, 3489, 3489,
3489, 3490, 3490, 3490, 3490, 3490, 3490, 3490, 3490, 3490, 3490,
3490, 3490, 3490, 3490, 3490, 3490, 3490, 3491, 3491, 3491, 3491,
3491, 3491, 3491, 3491, 3491, 3491, 3491, 3491, 3491, 3491, 3491,
3491, 3491, 3491, 3492, 3492, 3492, 3492, 3492, 3492, 3492, 3492,
3492, 3492, 3492, 3492, 3492, 3492, 3492, 3492, 3492, 3492, 3493,
3493, 3493, 3493, 3493, 3493, 3493, 3493, 3493, 3493, 3493, 3493,
3493, 3493, 3493, 3493, 3493, 3493, 3494, 3494, 3494, 3494, 3494,
3494, 3494, 3494, 3494, 3494, 3494, 3494, 3494, 3494, 3494, 3494,
3494, 3494, 3495, 3495, 3495, 3495, 3495, 3495, 3495, 3495, 3495,
3495, 3495, 3495, 3495, 3495, 3495, 3495, 3495, 3495, 3496, 3496,
3496, 3496, 3496, 3496, 3496, 3496, 3496, 3496, 3496, 3496, 3496,
3496, 3496, 3496, 3497, 3497, 3497, 3497, 3497, 3497, 3497, 3497,
3497, 3497, 3497, 3497, 3497, 3497, 3497, 3497, 3497, 3498, 3498,
3498, 3498, 3498, 3498, 3498, 3498, 3498, 3498, 3498, 3498, 3498,
3498], dtype=int64), array([14, 2, 13, 14, 15, 18, 2, 13, 14, 15, 18, 2, 13, 14, 15, 16, 18,
8, 8, 11, 13, 18, 11, 9, 1, 2, 3, 7, 9, 11, 12, 13, 14, 15,
16, 18, 1, 2, 3, 4, 7, 9, 11, 13, 14, 15, 17, 18, 2, 3, 7,
9, 13, 18, 2, 3, 4, 7, 9, 11, 17, 3, 7, 9, 15, 18, 3, 4,
6, 7, 9, 14, 15, 18, 2, 4, 6, 7, 9, 13, 14, 15, 18, 3, 8,
8, 8, 8, 3, 3, 5, 8, 3, 11, 17, 2, 13, 14, 17, 2, 10, 11,
12, 13, 14, 15, 16, 17, 18, 1, 2, 7, 9, 10, 16, 17, 1, 3, 6,
9, 10, 16, 17, 1, 6, 7, 8, 9, 10, 16, 17, 1, 6, 7, 8, 9,
10, 16, 1, 2, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18, 1,
2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16,
17, 18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17,
18, 1, 2, 6, 7, 8, 9, 10, 13, 14, 15, 16, 18, 1, 8, 10, 15,
16, 18, 4, 4, 8, 4, 11, 13, 11, 13, 4, 4, 4, 6, 6, 6, 5,
6, 4, 5, 5, 5, 6, 5, 17, 11, 17, 10, 15, 16, 17, 1, 2, 10,
11, 13, 14, 15, 16, 17, 18, 2, 10, 14, 15, 16, 17, 18, 1, 2, 10,
11, 12, 13, 14, 15, 16, 17, 18, 10, 17, 8, 5, 8, 17, 11, 17, 10,
1, 2, 8, 9, 10, 14, 15, 16, 17, 18, 1, 2, 3, 6, 7, 8, 9,
10, 11, 13, 14, 15, 16, 17, 18, 1, 2, 3, 5, 7, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 1, 2, 7, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 1, 2, 3, 7, 10, 11, 13, 14, 15, 16, 17, 18, 10, 11, 2,
11, 12, 13, 18, 11, 13, 17, 5, 3, 4, 4, 4, 4, 8, 8, 5, 8,
3, 7, 1, 9, 10, 15, 16, 1, 6, 7, 8, 9, 10, 15, 16, 1, 2,
6, 7, 8, 9, 10, 15, 16, 18, 1, 2, 4, 5, 6, 7, 8, 9, 10,
13, 14, 15, 16, 18, 1, 2, 4, 5, 6, 7, 9, 10, 14, 15, 16, 18,
1, 2, 5, 6, 7, 8, 9, 13, 15, 16, 1, 2, 4, 5, 6, 7, 9,
13, 14, 15, 16, 17, 18, 1, 2, 6, 13, 14, 15, 16, 18, 1, 2, 8,
13, 14, 15, 16, 18, 3, 3, 4, 2, 3, 7, 9, 11, 12, 13, 14, 15,
18, 2, 3, 4, 7, 9, 11, 13, 14, 15, 17, 18, 3, 7, 9, 13, 3,
4, 7, 7, 9, 15, 18, 3, 4, 6, 7, 9, 14, 15, 18, 2, 4, 6,
7, 9, 13, 14, 15, 18, 13, 17, 5, 5, 6, 6, 5, 3, 6, 3, 6,
4, 6, 4, 8, 3, 4, 8, 3, 4, 8, 4, 4, 8, 3, 4, 8, 10,
3, 4, 8, 4, 8, 4, 8, 1, 8, 9, 14, 15, 18, 1, 2, 3, 6,
8, 9, 10, 14, 15, 16, 18, 1, 6, 8, 9, 10, 14, 15, 18, 1, 2,
3, 6, 8, 9, 10, 15, 18, 1, 2, 3, 5, 6, 8, 9, 10, 11, 13,
14, 15, 16, 18, 1, 2, 3, 4, 5, 6, 8, 9, 12, 18, 3, 5, 6,
8, 3, 6, 8, 18, 1, 2, 3, 7, 8, 9, 11, 12, 13, 14, 15, 18,
11, 5, 5, 2, 3, 5, 11, 17, 11, 5, 14, 17, 2, 9, 12, 13, 14,
15, 17, 18, 1, 2, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1, 2,
3, 5, 6, 7, 9, 10, 13, 14, 15, 16, 17, 18, 1, 2, 3, 6, 7,
9, 10, 12, 13, 15, 16, 17, 18, 1, 2, 3, 5, 6, 7, 8, 9, 10,
16, 17, 1, 2, 6, 7, 8, 9, 10, 15, 16, 17, 18, 1, 2, 4, 5,
6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 1, 2,
3, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16, 18], dtype=int64))
# Drop the value which are greater than threshold of zscore
df_clean = df
df_clean = df_clean[(zscore<3).all(axis=1)]
df_clean.head(3)
| Entity | Year | Terrorist attacks | Terrorism deaths | Attack method: Hijacking | Attack method: Hostage Taking (Barricade Incident) | Attack method: Unarmed Assault | Attack method: Facility/Infrastructure Attack | Attack method: Hostage Taking (Kidnapping) | Attack method: Assassination | Attack method: Armed Assault | Attack method: Bombing/Explosion | Death Age 100+ | Death Age: 51-99 | Death Age : 21-50 | Death Age : 11-20 | Death Age : 6-10 | Death Age : 1-5 | Terrorist Death Type : Suicide | Terrorist Death Type : Killed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 1979 | 3 | 53 | 3 | 6 | 5 | 24 | 1 | 52 | 1 | 1 | 2 | 7 | 1 | 17 | 25 | 2 | 402 | 53 |
| 1 | Afghanistan | 1988 | 11 | 128 | 3 | 6 | 5 | 24 | 30 | 52 | 129 | 11 | 2 | 7 | 3 | 1 | 2 | 4 | 402 | 128 |
| 2 | Afghanistan | 1989 | 10 | 10 | 3 | 6 | 5 | 24 | 30 | 1 | 129 | 9 | 2 | 7 | 11 | 17 | 1 | 1 | 402 | 10 |
df.shape ,df_clean.shape
((3580, 20), (3417, 20))
sns.boxplot(data=df['Terrorist attacks'])
<Axes: >
import seaborn as sns
import matplotlib.pyplot as plt
# Assuming df is your DataFrame
numeric_columns = df.select_dtypes(include=[np.number]).columns
# Create boxplots for each numeric column
for column in numeric_columns:
plt.figure(figsize=(8, 6))
sns.boxplot(x=df[column])
plt.title(f'Boxplot for {column}')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# Assuming df is your DataFrame
numeric_columns = df.select_dtypes(include=[np.number]).columns
# Create boxplots for each numeric column after applying log transformation
for column in numeric_columns:
plt.figure(figsize=(8, 6))
sns.boxplot(data=np.log10(df[column]))
plt.title(f'Boxplot for Log-Transformed {column}')
plt.show()
df_clean = df
df = df_clean
df.shape
(3580, 20)
sns.heatmap(df.isnull())
<Axes: >
df.skew()
Year -0.087564 Terrorist attacks 9.633914 Terrorism deaths 9.499543 Attack method: Hijacking 10.105638 Attack method: Hostage Taking (Barricade Incident) 8.524867 Attack method: Unarmed Assault 10.785614 Attack method: Facility/Infrastructure Attack 9.106834 Attack method: Hostage Taking (Kidnapping) 10.452472 Attack method: Assassination 6.567162 Attack method: Armed Assault 8.823958 Attack method: Bombing/Explosion 10.255642 Death Age 100+ 14.320708 Death Age: 51-99 5.319208 Death Age : 21-50 9.823636 Death Age : 11-20 9.235506 Death Age : 6-10 8.830875 Death Age : 1-5 9.484390 Terrorist Death Type : Suicide 10.605961 Terrorist Death Type : Killed 9.375305 dtype: float64
plt.figure(figsize=(20,20))
sns.heatmap(df.corr(),annot=True)
plt.show()
df.replace(0, pd.NA, inplace=True)
df.dropna(inplace=True)
print(df)
Entity Year Terrorist attacks Terrorism deaths \
0 Afghanistan 1979 3 53
1 Afghanistan 1988 11 128
2 Afghanistan 1989 10 10
3 Afghanistan 1990 2 12
4 Afghanistan 1991 30 68
... ... ... ... ...
3575 Zimbabwe 2008 5 7
3576 Zimbabwe 2010 1 1
3577 Zimbabwe 2013 3 1
3578 Zimbabwe 2018 2 2
3579 Zimbabwe 2020 1 1
Attack method: Hijacking \
0 3
1 3
2 3
3 3
4 3
... ...
3575 3
3576 3
3577 3
3578 3
3579 3
Attack method: Hostage Taking (Barricade Incident) \
0 6
1 6
2 6
3 6
4 6
... ...
3575 6
3576 6
3577 6
3578 6
3579 6
Attack method: Unarmed Assault \
0 5
1 5
2 5
3 5
4 5
... ...
3575 5
3576 5
3577 5
3578 5
3579 5
Attack method: Facility/Infrastructure Attack \
0 24
1 24
2 24
3 24
4 24
... ...
3575 24
3576 24
3577 1
3578 24
3579 24
Attack method: Hostage Taking (Kidnapping) \
0 1
1 30
2 30
3 30
4 4
... ...
3575 30
3576 30
3577 30
3578 30
3579 30
Attack method: Assassination Attack method: Armed Assault \
0 52 1
1 52 129
2 1 129
3 52 129
4 1 2
... ... ...
3575 52 2
3576 52 1
3577 52 2
3578 1 1
3579 52 1
Attack method: Bombing/Explosion Death Age 100+ Death Age: 51-99 \
0 1 2 7
1 11 2 7
2 9 2 7
3 2 2 7
4 23 2 7
... ... ... ...
3575 3 2 7
3576 131 2 7
3577 131 2 7
3578 131 2 7
3579 131 2 7
Death Age : 21-50 Death Age : 11-20 Death Age : 6-10 \
0 1 17 25
1 3 1 2
2 11 17 1
3 11 17 2
4 11 4 25
... ... ... ...
3575 11 17 1
3576 11 17 25
3577 11 17 25
3578 11 17 25
3579 11 17 25
Death Age : 1-5 Terrorist Death Type : Suicide \
0 2 402
1 4 402
2 1 402
3 3 402
4 7 402
... ... ...
3575 3 402
3576 1 402
3577 1 402
3578 1 402
3579 1 402
Terrorist Death Type : Killed
0 53
1 128
2 10
3 12
4 68
... ...
3575 7
3576 1
3577 1
3578 2
3579 1
[3580 rows x 20 columns]
sns.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x1632318b520>
# Lable Encoding
from sklearn.preprocessing import LabelEncoder
# Instantiate LabelEncoder
lb = LabelEncoder()
df['Entity'] = lb.fit_transform(df['Entity'])
df.head(3)
| Entity | Year | Terrorist attacks | Terrorism deaths | Attack method: Hijacking | Attack method: Hostage Taking (Barricade Incident) | Attack method: Unarmed Assault | Attack method: Facility/Infrastructure Attack | Attack method: Hostage Taking (Kidnapping) | Attack method: Assassination | Attack method: Armed Assault | Attack method: Bombing/Explosion | Death Age 100+ | Death Age: 51-99 | Death Age : 21-50 | Death Age : 11-20 | Death Age : 6-10 | Death Age : 1-5 | Terrorist Death Type : Suicide | Terrorist Death Type : Killed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1979 | 3 | 53 | 3 | 6 | 5 | 24 | 1 | 52 | 1 | 1 | 2 | 7 | 1 | 17 | 25 | 2 | 402 | 53 |
| 1 | 0 | 1988 | 11 | 128 | 3 | 6 | 5 | 24 | 30 | 52 | 129 | 11 | 2 | 7 | 3 | 1 | 2 | 4 | 402 | 128 |
| 2 | 0 | 1989 | 10 | 10 | 3 | 6 | 5 | 24 | 30 | 1 | 129 | 9 | 2 | 7 | 11 | 17 | 1 | 1 | 402 | 10 |
# Make target
X = df.drop(['Death Age : 21-50 '],axis=True)
y = df['Death Age : 21-50 ']
X.shape,y.shape
((3580, 19), (3580,))
# split data set to training & testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
((2864, 19), (716, 19), (2864,), (716,))
# Train data
plt.plot(X_train,y_train,'r.',label='train_data')
plt.show()
# Test Data
plt.plot(X_test,y_test,'g.',label='train_data')
plt.show()
# Use Model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
lm = LinearRegression()
lm.fit(X_train, y_train)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
# Predictions
predictions =lm.predict(X_test)
print(predictions)
[ 6.34542853 7.45965361 11.26362744 8.0046228 5.47216335 7.20024016 10.53074081 9.90782094 9.88012909 5.63865155 6.40743658 9.98513289 10.67261718 44.10128656 10.85440976 10.79462136 10.53855239 3.74228348 10.62301021 10.64531829 10.91260626 9.2311895 7.73896046 10.36735899 10.93867998 10.53271379 2.64482018 5.15557352 10.80107625 11.2099333 10.16991956 10.53639985 7.11990012 10.88055418 9.56613826 9.56250619 3.79480831 7.26439594 7.05282044 6.91639606 7.08658484 10.59813744 10.98914717 10.54803243 7.47138649 10.42286575 8.42079387 11.06656653 7.38361234 9.82496211 10.82268851 10.90384973 10.96734196 7.91718343 11.36793205 10.40927594 9.58007292 9.94135114 9.08706409 10.78427165 10.5413204 6.90398953 8.10742799 11.16376724 10.99221472 10.02015126 10.91264743 7.56347081 5.26496393 11.09634457 11.06255804 7.23847329 5.61237547 10.8918172 8.80628856 7.13188138 10.58935641 10.74770528 10.50299531 3.65889276 6.98836045 7.368085 10.01427775 11.23917938 9.77374306 11.23062658 10.8330866 7.41410166 10.6600004 6.97340072 10.84526018 19.48352806 7.39103612 7.14197073 7.27950956 10.30345186 10.99642145 35.47197432 10.5671498 9.82481017 6.55549898 122.78405799 10.65069609 34.0655502 10.15684628 9.03567806 8.36037988 7.56761498 6.44429154 8.91957051 10.97946005 10.50657982 10.04930794 10.6035352 5.49932306 10.39811628 6.87934092 5.00822827 9.75273821 27.84196636 11.33817832 10.46306508 35.87046541 10.39864303 9.95642137 10.79055034 55.80698758 6.6672003 10.98154907 11.16388951 10.72244114 7.14765694 10.3765953 11.20905951 6.70077488 7.2744526 11.09897605 8.85335024 7.34336873 7.50681456 3.14626022 10.72775636 8.55500474 10.70067946 5.66132078 10.82324049 11.83929443 10.52858559 9.4785505 10.74709858 7.20422743 6.47127692 11.14101141 11.3830038 8.60614714 10.67207394 6.8641471 11.13993933 9.31666767 10.03760246 10.87641022 7.91779988 21.89520151 5.36367324 10.47188412 6.67682692 10.38695095 11.1663167 7.36910125 6.4212494 10.02433156 5.55137508 9.63270436 11.0525922 14.26586291 7.20855018 11.02293493 10.7697306 9.43805835 7.57833043 6.37936911 6.85717696 7.11785546 8.19620918 10.44132933 10.88192173 5.87761423 6.63709009 9.4433606 10.51285507 9.55153029 7.82371418 10.77053601 8.0887614 6.20719406 12.01644632 5.84632232 11.23694136 10.6651852 93.31756649 8.04581386 9.39519634 9.917025 9.83242234 10.15719984 11.03469037 6.9578208 9.89436242 10.62083314 10.84584548 31.33922214 10.59316805 7.09992678 10.61512917 9.62530132 4.74283549 10.99241133 11.07791973 10.93021616 6.66922854 8.09255201 11.08584797 10.60774549 24.30569339 12.83788933 7.42847922 9.39637857 3.53576421 10.17212047 3.88620153 10.27002118 7.50538067 8.58014387 145.33333373 6.9553978 6.98933926 10.10726139 8.9028593 11.15738743 7.5777718 10.36575765 28.00694105 11.01618294 10.18216373 6.2354427 10.68927784 28.12492901 10.72757839 9.66401069 7.67260112 8.78282047 11.12541838 10.61354369 5.08992667 5.83650245 11.11684227 7.89135261 10.28559524 9.63385426 11.1397221 11.04097573 10.19128787 5.18314382 7.79896439 7.86545188 68.26905611 40.8334233 6.83972712 9.86578823 1.76552186 10.05957676 12.05809701 11.09714909 5.26261317 6.47117251 9.35383363 7.98747681 10.67603701 10.9848176 8.17929854 10.77263823 8.31617906 6.48517468 11.00361805 11.02565426 10.8673173 8.40564456 10.86470478 7.58764875 6.63171444 6.64056492 6.70474556 10.44126691 7.4702499 10.03781413 9.38342319 8.93654294 10.50961577 7.27943952 9.47740717 9.45874367 10.73771668 10.60105213 8.11292646 10.32907312 10.81608851 3.73756707 10.44830542 10.94192194 9.98534265 9.44420641 10.72044244 11.37087466 11.11863416 7.2952484 9.85830182 5.91428438 7.40315566 10.33552229 7.54463761 10.4365882 10.62956499 7.91794755 17.56589868 6.57220132 10.67754426 6.49657983 11.59474837 10.20337404 6.62382138 62.83708228 10.69238525 11.07844904 6.55259147 10.20648383 253.42197527 10.69642837 11.03742914 6.21589073 3.8604527 6.39617212 10.4820133 7.32862647 10.58219561 10.74003641 10.426465 10.99843105 10.08150882 11.12230999 11.12409643 10.59469612 10.81902975 9.67020021 3.28287512 8.21517764 10.47093894 37.06132572 9.97049582 10.27460969 30.86070362 32.2068858 9.91522261 11.20272951 11.13908705 10.43052732 11.10517616 10.20832433 11.03006037 6.30514743 6.16936206 10.22733482 7.06316442 6.43685993 6.27290242 5.96900128 3.45973373 7.53553965 9.57920246 10.15204678 3.71423818 10.4004459 7.54137048 110.73590572 11.10091649 10.6253673 10.3662411 6.40061685 6.55667849 9.86202602 29.44085661 10.61592917 10.80870162 7.38039701 3.27787617 21.08042426 10.79290543 10.11337476 10.9625388 7.54482197 221.16413462 7.30744488 10.8529911 9.76075514 15.30572897 6.23356797 62.18417372 6.20667687 11.2004592 10.27178192 10.6585433 6.49515288 10.92860527 9.96851206 9.77894586 5.36490362 10.78503551 11.50776212 10.91910262 10.71443213 10.03481281 8.4682781 10.73243221 14.28144978 10.05835404 6.17405685 10.747216 14.83706158 9.1550862 10.10581202 10.95051977 10.45532139 9.51714963 9.1601062 7.57123298 9.78586449 7.6241568 8.82328826 6.83445277 10.87775932 11.16829492 9.64481839 6.00807283 7.93065172 3.901459 9.66276178 11.04959082 11.4493952 10.94500505 3.08176438 10.44142715 10.69497826 10.61344479 9.34588221 5.1099483 8.33488175 8.03180675 10.95410901 11.1057475 11.16924037 10.41767052 6.21680586 10.66456849 10.87677869 10.84715422 11.48581319 11.24152084 10.48465541 10.23177206 9.96729078 7.10647017 6.25738489 11.28055972 10.85531679 8.05294819 10.90562666 10.08151874 15.67127007 10.10981087 10.1741601 7.33121959 14.32827475 11.23926149 9.89603469 6.03555612 6.37068548 10.64238543 11.02291393 9.50010249 6.04264202 10.69378963 10.45958367 6.43987093 9.51618916 5.8152954 10.36107874 10.8694766 11.35832593 9.85405967 10.1083243 12.02676921 10.56212933 5.23477153 9.92593842 4.12861501 11.26841551 5.77320274 11.27955673 9.37016633 10.77599325 10.53793851 10.09365739 8.1418654 10.45837241 42.46693388 7.18516197 7.2836389 10.14303108 5.91751113 10.92946631 10.60216003 10.79799375 10.63725083 9.57567993 13.20220119 10.93920717 10.36111441 9.78606396 7.22079318 10.48374096 6.64568238 9.50273708 10.76720629 10.12167604 9.87158742 6.85939092 11.1190185 8.07320194 9.23189682 9.11758465 6.90511367 5.93169378 11.1521336 6.43737993 7.2321041 7.09827412 10.98545425 10.36934576 10.55418811 2.52949574 8.82417464 10.17351945 10.0339833 9.29456514 5.35130107 7.42179733 9.78790255 7.63844214 10.58132724 9.84573454 5.3504468 10.07067717 8.95427342 8.76194494 11.09761953 11.76550592 11.12254967 8.99742634 7.74634178 9.63536628 10.91905334 6.44153184 18.40087794 9.17224871 46.48657681 6.21251604 7.62419567 7.04573585 7.53369016 8.69023755 5.94542612 7.77361242 9.96403598 10.74154581 10.03918246 15.27059565 10.51832662 10.44080881 11.07738739 9.98758881 10.11415201 6.7685716 10.17213323 10.89192489 6.49242142 47.6325221 5.73436828 4.42658906 7.09619981 7.38742696 10.61545467 10.44500115 10.9341385 8.8117884 6.44524184 11.24833751 7.35452459 6.81952427 7.04101589 11.05672723 9.50851729 10.97102901 10.2347827 6.98525155 9.76693164 9.66201814 44.83619657 10.78978015 8.50279497 10.36485839 9.81857131 10.93344745 6.81264282 9.75777177 6.02567872 9.97344457 9.88726206 7.39324024 10.82159009 10.628997 7.8391426 10.97740248 10.69343873 8.44387262 26.75410613 10.99164848 10.41055065 13.2417764 10.88105091 11.28779505 9.81853727 8.01355075 10.8081634 9.78719889 5.79533315 6.54106027 10.33888976 11.07871191 10.81013054 10.06626252 10.85508448 7.6854125 7.71562997 10.01742472 8.93175489 7.07648531 10.10561771 15.807221 10.75773263 11.11025614 13.1310997 6.62452947 10.8162224 9.99712572 10.85327732 9.19662041 4.08590113 9.79676335 10.98381517 10.61225521 10.06419178 12.0249083 10.41402396 19.23298102 17.30720854 38.1945445 5.3341561 11.05479678 10.43908262 7.40760036 11.01782188 10.85235614 10.73184251 10.29946982 10.64968752 10.98925045 10.06388044 11.17960603 2.69756577 10.93190643 4.07203487 10.32724726 6.53137861 10.4605592 9.32870438 6.81288516 11.03382563 10.4540544 8.98729804 10.79241092 11.00736891 10.58162642 4.97230303 10.04797371 10.7948059 10.79669653 10.75770387 11.00835522 10.21641973 10.68302812 11.33515063 9.82806754 10.72110729 11.17941873 10.13848319 10.547641 11.15011455]
import matplotlib.pyplot as plt
# Assuming predictions and actual values are NumPy arrays or Pandas Series
plt.figure(figsize=(8, 6))
plt.scatter(y_test, predictions, alpha=0.5) # Scatter plot of actual vs. predicted values
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], linestyle='--', color='red', linewidth=2) # Diagonal line for reference
plt.title('Actual vs. Predicted Values')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.show()
mse = mean_squared_error(y_test, predictions)
print(mse)
18.629270732577435
lm.coef_
array([ 2.08536426e-03, 1.19835090e-02, 1.87501675e-03, 1.24858419e-03,
-1.19395090e-01, 3.11258539e-02, 3.26285001e-02, 7.71822838e-04,
2.10841971e-02, -4.89151409e-04, -2.64989151e-04, 1.81432912e-03,
7.88944920e-01, 5.69573315e-01, 2.01737838e-01, 2.78177947e-02,
-1.98655382e-02, 3.33269592e-03, 2.08383876e-03])
lm.intercept_
-25.000651611691026
lm.score(X_test,y_test)
0.9345736672047595
from sklearn.metrics import mean_squared_error,r2_score
print(mean_squared_error(predictions,y_test))
18.629270732577435